#### Script to do multiple treeMI imputations for all manufacturing industries in the 2007 Census of Manufactures
### and export the imputed datasets

#### input files:

#### output files:

require(tree)

### Read in the entire 2007 CMF dataset (where items imputed by industry average ration or univarirate regression have been made missing)

gooddata<-read.csv("gooddata_all_inds07.csv",header=TRUE)

## Just rearranging the columns so that I know what order they are in:
mygooddata<-gooddata[c("NAICS_NEW_6","TAE","CM","TE","PH","SW","TVS","dinv","energycmratio","wwswratio")]

industries<-read.csv("cmf07_inds.csv",header=TRUE)

industry<-industries[industries$number==1,c("NAICS_NEW_6")]
treeData<-mygooddata[mygooddata$NAICS_NEW_6==industry,]

## We only want to run treeMI if the original data has some missing values, so first check for missing values
## treeMI will only run if there are at least 2 variables that have at least one missing value, 
## so count the number of variables with missing values.
missingcounter<-0
missingvarscounter<-0
for (Var in names(treeData))  {
    missing <- sum(is.na(treeData[,Var]))
    missingcounter <- missingcounter + missing
    if (missing>0) {
        missingvarscounter<-missingvarscounter + 1 
    }
    print(c(industry,Var,missing))
}
if (missingvarscounter >= 2) {

  imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
  imputes$impsetnum <- 1
  write.table(imputes,file="imputes_all_inds07.csv",append=FALSE,sep=",")
  for (j in 2:100) {

      imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
      imputes$impsetnum <- j 
      write.table(imputes,file="imputes_all_inds07.csv",append=TRUE,sep=",",col.names=FALSE) 
  }
}

### Loop through industries:   

for (i in 2:471)  {

    industry<-industries[industries$number==i,c("NAICS_NEW_6")]
    treeData<-mygooddata[mygooddata$NAICS_NEW_6==industry,]

    ## We only want to run treeMI if the original data has some missing values, so first check for missing values
    missingcounter<-0
    missingvarscounter<-0
    for (Var in names(treeData))  {
        missing <- sum(is.na(treeData[,Var]))
        missingcounter <- missingcounter + missing
        if (missing>0) {
            missingvarscounter<-missingvarscounter + 1 
        }
        print(c(industry,Var,missing))

    }
    if (missingvarscounter >= 2) {
        imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.000001, startCut = 5, startDev = 0.000001)
        imputes$impsetnum <- 1
        write.table(imputes,file="imputes_all_inds07.csv",append=TRUE,sep=",",col.names=FALSE) 
        for (j in 2:100) {
            imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
            imputes$impsetnum <- j 
            write.table(imputes,file="imputes_all_inds07.csv",append=TRUE,sep=",",col.names=FALSE) 
        }
    }

}
